import numpy as np
import pandas as pd
import ast
from pandas import Series, DataFrame
import matplotlib.pyplot as plt
from matplotlib.ticker import StrMethodFormatter
from itertools import combinations
from scipy import stats
import seaborn as sns
import statsmodels.api as sm
from sklearn import linear_model as lm
from scipy.misc import factorial
import matplotlib
%matplotlib notebook
df = pd.read_csv('globalTerrorism.csv', engine = 'python')
dataset1 = pd.concat([df['country'],df['country_txt'],df['region'],df['provstate'],df['city'],df['targtype1'],df['attacktype1'],df['nkill'],df['nwound']] ,axis=1)
happiness = pd.read_csv('World-Happiness-Report/2017.csv', engine = 'python')
def plotHistDist(func, x, r, title, l, xlabel, ylabel):
plt.hist(r, normed=True, histtype='stepfilled', alpha=0.2)
plotDist(x, func, title , l, xlabel, ylabel)
#given functions
def plotDist(x, func , title , l , xlabel , ylabel):
plt.plot(x, func, 'b', lw=2, alpha=0.6, label=l)
xl = plt.gca().get_xlim()
#lines on Y−axis
plt.hlines(0, xl[0], xl[1], linestyles='--', colors='#999999')
plt.gca().set_xlim ( xl )
plt.legend(loc='best', frameon=False)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
plt.title(title)
#Making dataFrame of counts of attacks in each country
terrCnt = dataset1.country_txt.value_counts()
dfCnt = pd.DataFrame(terrCnt)
dfCnt = dfCnt.rename(columns={'country_txt':'NumberofAttacks'})
#Poisson distribution
tot = 0
arr = [0,0,0,0,0,0,0,0,0]
for i in range(9):
UnitedStates = df.loc[df['country'] == 217]
GT1970 = UnitedStates[UnitedStates['iyear'] > 1972+(5*i)]
fcnt = GT1970[GT1970['iyear'] < 1977+(5*i)]
fcnt = fcnt.country_txt.value_counts()
arr[i] = fcnt[0]
tot += fcnt[0]
avg = tot/9
print("The probability there will be exactly 200 attacks in the US within the next five-year period:")
print(stats.poisson.pmf(200, avg))
total = 0
for i in range(200):
tmp = stats.poisson.pmf(i,avg)
total += tmp
print("\nThe probability there will be more than 200 attacks in the US within the next five-year period:")
probability = 1 - total
print(probability)
#Poisson distribution
tot = 0
for i in range(9):
UnitedStates = df.loc[df['country'] == 185]
GT1970 = UnitedStates[UnitedStates['iyear'] > 1972+(5*i)]
fcnt = GT1970[GT1970['iyear'] < 1977+(5*i)]
fcnt = fcnt.country_txt.value_counts()
tot += fcnt[0]
arr[i] = fcnt[0]
avg = tot/9
arr = np.arange(3000)
print("The probability there will be exactly 200 attacks in Spain within the next five-year period:")
print(stats.poisson.pmf(200, avg))
total = 0
for i in range(250):
tmp = stats.poisson.pmf(i,avg)
total += tmp
print("\nThe probability there will be more than 250 attacks in Spain within the next five-year period:")
print(1 - total)
#Poisson distribution
tot = 0
for i in range(9):
UnitedStates = df.loc[df['country'] == 94]
GT1970 = UnitedStates[UnitedStates['iyear'] > 1972+(5*i)]
fcnt = GT1970[GT1970['iyear'] < 1977+(5*i)]
fcnt = fcnt.country_txt.value_counts()
tot += fcnt[0]
avg = tot/9
arr = np.arange(3000)
print("The probability there will be exactly 90 attacks in Iran within the next five-year period:")
print(stats.poisson.pmf(90, avg))
total = 0
for i in range(50):
tmp = stats.poisson.pmf(i,avg)
total += tmp
print("\nThe probability there will be more than 50 attacks in Iran within the next five-year period:")
print(1 - total)
#Poisson distribution
tot = 0
arr = [0,0,0,0,0,0,0,0,0]
for i in range(9):
UnitedStates = df.loc[df['country'] == 95]
GT1970 = UnitedStates[UnitedStates['iyear'] > 1972+(5*i)]
fcnt = GT1970[GT1970['iyear'] < 1977+(5*i)]
#print(GT1970)
fcnt = fcnt.country_txt.value_counts()
arr[i] = fcnt[0]
tot += fcnt[0]
avg = tot/9
print("The probability there will be exactly 2100 attacks in Iraq within the next five-year period:")
print(stats.poisson.pmf(2100, avg))
total = 0
for i in range(2000):
tmp = stats.poisson.pmf(i,avg)
total += tmp
print("\nThe probability there will be more than 2000 attacks in Iraq within the next five-year period:")
print(1 - total)
#NorthAmerica Happiness score vs number of attacks
No = df.loc[df['iyear'] > 2013]
NorthAmerica = No.loc[No['region'] == 1]
NorthAmericaTerrCnt = NorthAmerica.country_txt.value_counts()
dfNA = pd.DataFrame(NorthAmericaTerrCnt)
dfNA = dfNA.rename(columns={'country_txt':'NumberofAttacks'})
dfNA
happiness = pd.read_csv('World-Happiness-Report/2017.csv', engine = 'python')
Happiness = pd.concat([happiness['Country'],happiness['Happiness.Score']],axis=1)
phappiness = Happiness.set_index('Country')
combined = pd.concat([phappiness,dfNA],axis=1,join='inner')
combined = combined.rename(columns={'country_txt':'NumberofAttacks'})
combined
slope, intercept, r_value, p_value, std_err = stats.linregress(combined['Happiness.Score'],combined['NumberofAttacks'])
graph = sns.regplot(x="Happiness.Score", y="NumberofAttacks", data=combined, color='b', line_kws={'label':"y={0:.1f}x+{1:.1f}".format(slope,intercept)})
plt.show()
print("Slope is %d" % slope)
print("Intercept is %d" % intercept)
print("R value is %f" % r_value)
print("P value is %f" % p_value)
print("Std Error is %d" % std_err)
#Happiness score vs wounded
df = df.loc[df['iyear'] > 2013]
wound = df.loc[df['region'] == 10]
wound = pd.concat([wound['country_txt'],wound['nwound']],axis=1)
filtered = wound.loc[df['nwound'] > 0]
pwound = filtered.set_index('country_txt')
Happiness = pd.concat([happiness['Country'],happiness['Happiness.Score']],axis=1)
phappiness = Happiness.set_index('Country')
#cant use df because its a different dataset! use Happiness dataset
reducedPwound = pwound.groupby(pwound.index).sum()
wounded = pd.concat([phappiness,reducedPwound],axis=1,join='inner')
print(wounded)
slope, intercept, r_value, p_value, std_err = stats.linregress(wounded['Happiness.Score'],wounded['nwound'])
ax = sns.regplot(x="Happiness.Score", y="nwound", data=wounded, color='b', line_kws={'label':"y={0:.1f}x+{1:.1f}".format(slope,intercept)})
#wounded
print("Slope is %d" % slope)
print("Intercept is %d" % intercept)
print("R value is %f" % r_value)
print("P value is %f" % p_value)
print("Std Error is %d" % std_err)
No = df.loc[df['iyear'] > 2013]
NorthAmerica = No.loc[No['region'] == 1]
NorthAmericaTerrCnt = NorthAmerica.country_txt.value_counts()
dfNA = pd.DataFrame(NorthAmericaTerrCnt)
dfNA = dfNA.rename(columns={'country_txt':'NumberofAttacks'})
dfNA
happiness = pd.read_csv('World-Happiness-Report/2017.csv', engine = 'python')
Happiness = pd.concat([happiness['Country'],happiness['Happiness.Score']],axis=1)
phappiness = Happiness.set_index('Country')
combined = pd.concat([phappiness,dfNA],axis=1,join='inner')
combined = combined.rename(columns={'country_txt':'NumberofAttacks'})
combined
slope, intercept, r_value, p_value, std_err = stats.linregress(combined['Happiness.Score'],combined['NumberofAttacks'])
graph = sns.regplot(x="Happiness.Score", y="NumberofAttacks", data=combined, color='b', line_kws={'label':"y={0:.1f}x+{1:.1f}".format(slope,intercept)})
plt.show()
print("Slope is %d" % slope)
print("Intercept is %d" % intercept)
print("R value is %f" % r_value)
print("P value is %f" % p_value)
print("Std Error is %d" % std_err)
MiddleEast = df.loc[df['region'] == 10]
MiddleEastTerrCnt = MiddleEast.country_txt.value_counts()
dfME = pd.DataFrame(MiddleEastTerrCnt)
dfME = dfME.rename(columns={'country_txt':'NumberofAttacks'})
dfME
Happiness = pd.concat([happiness['Country'],happiness['Happiness.Score']],axis=1)
phappiness = Happiness.set_index('Country')
combined = pd.concat([phappiness,dfME],axis=1,join='inner')
combined = combined.rename(columns={'country_txt':'NumberofAttacks'})
slope, intercept, r_value, p_value, std_err = stats.linregress(combined['Happiness.Score'],combined['NumberofAttacks'])
ax = sns.regplot(x="Happiness.Score", y="NumberofAttacks", data=combined, color='b', line_kws={'label':"y={0:.1f}x+{1:.1f}".format(slope,intercept)})
print("Slope is %d" % slope)
print("Intercept is %d" % intercept)
print("R value is %f" % r_value)
print("P value is %f" % p_value)
print("Std Error is %d" % std_err)
SS = df.loc[df['region'] == 11]
SSTerrCnt = SS.country_txt.value_counts()
dfSS = pd.DataFrame(SSTerrCnt)
dfSS = dfSS.rename(columns={'country_txt':'NumberofAttacks'})
dfSS
Happiness = pd.concat([happiness['Country'],happiness['Happiness.Score']],axis=1)
phappiness = Happiness.set_index('Country')
combined = pd.concat([phappiness,dfSS],axis=1,join='inner')
combined = combined.rename(columns={'country_txt':'NumberofAttacks'})
slope, intercept, r_value, p_value, std_err = stats.linregress(combined['Happiness.Score'],combined['NumberofAttacks'])
ax = sns.regplot(x="Happiness.Score", y="NumberofAttacks", data=combined, color='b', line_kws={'label':"y={0:.1f}x+{1:.1f}".format(slope,intercept)})
print("Slope is %d" % slope)
print("Intercept is %d" % intercept)
print("R value is %f" % r_value)
print("P value is %f" % p_value)
print("Std Error is %d" % std_err)
WE = df.loc[df['region'] == 8]
WETerrCnt = WE.country_txt.value_counts()
dfWE = pd.DataFrame(WETerrCnt)
dfWE = dfWE.rename(columns={'country_txt':'NumberofAttacks'})
dfWE
Happiness = pd.concat([happiness['Country'],happiness['Happiness.Score']],axis=1)
phappiness = Happiness.set_index('Country')
combined = pd.concat([phappiness,dfWE],axis=1,join='inner')
combined = combined.rename(columns={'country_txt':'NumberofAttacks'})
slope, intercept, r_value, p_value, std_err = stats.linregress(combined['Happiness.Score'],combined['NumberofAttacks'])
ax = sns.regplot(x="Happiness.Score", y="NumberofAttacks", data=combined, color='b', line_kws={'label':"y={0:.1f}x+{1:.1f}".format(slope,intercept)})
print("Slope is %d" % slope)
print("Intercept is %d" % intercept)
print("R value is %f" % r_value)
print("P value is %f" % p_value)
print("Std Error is %d" % std_err)
SEA = df.loc[df['region'] == 5]
SEATerrCnt = SEA.country_txt.value_counts()
dfSEA = pd.DataFrame(SEATerrCnt)
dfSEA = dfSEA.rename(columns={'country_txt':'NumberofAttacks'})
dfSEA
Happiness = pd.concat([happiness['Country'],happiness['Happiness.Score']],axis=1)
phappiness = Happiness.set_index('Country')
combined = pd.concat([phappiness,dfSEA],axis=1,join='inner')
combined = combined.rename(columns={'country_txt':'NumberofAttacks'})
slope, intercept, r_value, p_value, std_err = stats.linregress(combined['Happiness.Score'],combined['NumberofAttacks'])
ax = sns.regplot(x="Happiness.Score", y="NumberofAttacks", data=combined, color='b', line_kws={'label':"y={0:.1f}x+{1:.1f}".format(slope,intercept)})
print("Slope is %d" % slope)
print("Intercept is %d" % intercept)
print("R value is %f" % r_value)
print("P value is %f" % p_value)
print("Std Error is %d" % std_err)
SA = df.loc[df['region'] == 3]
SATerrCnt = SA.country_txt.value_counts()
dfSA = pd.DataFrame(SATerrCnt)
dfSA = dfSA.rename(columns={'country_txt':'NumberofAttacks'})
dfSA
Happiness = pd.concat([happiness['Country'],happiness['Happiness.Score']],axis=1)
phappiness = Happiness.set_index('Country')
combined = pd.concat([phappiness,dfSA],axis=1,join='inner')
combined = combined.rename(columns={'country_txt':'NumberofAttacks'})
slope, intercept, r_value, p_value, std_err = stats.linregress(combined['Happiness.Score'],combined['NumberofAttacks'])
ax = sns.regplot(x="Happiness.Score", y="NumberofAttacks", data=combined, color='b', line_kws={'label':"y={0:.1f}x+{1:.1f}".format(slope,intercept)})
print("Slope is %d" % slope)
print("Intercept is %d" % intercept)
print("R value is %f" % r_value)
print("P value is %f" % p_value)
print("Std Error is %d" % std_err)
EE = df.loc[df['region'] == 9]
EETerrCnt = EE.country_txt.value_counts()
dfEE = pd.DataFrame(EETerrCnt)
dfEE = dfEE.rename(columns={'country_txt':'NumberofAttacks'})
dfEE
Happiness = pd.concat([happiness['Country'],happiness['Happiness.Score']],axis=1)
phappiness = Happiness.set_index('Country')
combined = pd.concat([phappiness,dfEE],axis=1,join='inner')
combined = combined.rename(columns={'country_txt':'NumberofAttacks'})
slope, intercept, r_value, p_value, std_err = stats.linregress(combined['Happiness.Score'],combined['NumberofAttacks'])
ax = sns.regplot(x="Happiness.Score", y="NumberofAttacks", data=combined, color='b', line_kws={'label':"y={0:.1f}x+{1:.1f}".format(slope,intercept)})
print("Slope is %d" % slope)
print("Intercept is %d" % intercept)
print("R value is %f" % r_value)
print("P value is %f" % p_value)
print("Std Error is %d" % std_err)
df = df.loc[df['iyear'] > 2013]
wound = df.loc[df['region'] == 10]
wound = pd.concat([wound['country_txt'],wound['nwound']],axis=1)
filtered = wound.loc[df['nwound'] > 0]
pwound = filtered.set_index('country_txt')
Happiness = pd.concat([happiness['Country'],happiness['Happiness.Score']],axis=1)
phappiness = Happiness.set_index('Country')
#cant use df because its a different dataset! use Happiness dataset
reducedPwound = pwound.groupby(pwound.index).sum()
wounded = pd.concat([phappiness,reducedPwound],axis=1,join='inner')
print(wounded)
slope, intercept, r_value, p_value, std_err = stats.linregress(wounded['Happiness.Score'],wounded['nwound'])
ax = sns.regplot(x="Happiness.Score", y="nwound", data=wounded, color='b', line_kws={'label':"y={0:.1f}x+{1:.1f}".format(slope,intercept)})
#wounded
print("Slope is %d" % slope)
print("Intercept is %d" % intercept)
print("R value is %f" % r_value)
print("P value is %f" % p_value)
print("Std Error is %d" % std_err)
import pandas as pd
import numpy as np
from pandas import Series, DataFrame
import matplotlib.pyplot as plt
import matplotlib
gbterr = df.copy()
#focus on terrorist attacks which happened after 2010 first because that is around the time the world happiness report gets its data
gbterr2017 = gbterr[gbterr.iyear >2013]
happiness = pd.read_csv('World-Happiness-Report/2017.csv', engine = 'python', index_col=0)
#lets visualize happiness scores
mng = plt.get_current_fig_manager()
mng.show_popup("True")
fig = plt.figure()
fig.set_size_inches(50,50)
happiness['Happiness.Score'].plot('bar')
plt.show()
matplotlib.rcParams.update({'font.size': 35})
fig = plt.figure()
fig.set_size_inches(50,100)
happiness['Happiness.Score'].plot('barh')
#now from the terrorism 2017 dataframe we get the number of attacks per country
tattacks_count = gbterr2017['country_txt'].value_counts()
tattacks_count.head()
#bar graph of terrorism attacks per country in 2017
fig = plt.figure()
matplotlib.rcParams.update({'font.size': 15})
fig.set_size_inches(40,30)
tattacks_count.plot("bar")
plt.show()
#now we look for a correlation between the number terrorist attacks in 2017 in a country and that country's happiness score as well as other variables in the happiness database
#tattacks_count
h_score = pd.Series(happiness['Happiness.Score'])
#h_score.index.name= "Country"
tattacks_count.index.name = "Country"
tattacks_count = pd.DataFrame(tattacks_count)
tattacks_count = tattacks_count.rename(columns={"country_txt":"NumOfAttacks"})
a_h = pd.merge(happiness,tattacks_count, on='Country')
dfcorr = a_h[["Happiness.Score", "NumOfAttacks","Economy..GDP.per.Capita.", "Freedom","Trust..Government.Corruption.","Dystopia.Residual","Family","Health..Life.Expectancy." ]]
dfcorr.corr()
f,ax = plt.subplots(figsize=(25, 18))
sns.heatmap(dfcorr.corr(), annot=True, linewidths=.5, fmt= '.1f',ax=ax)
plt.show()
un_report = pd.read_csv('undata-country-profiles/country_profile_variables.csv', engine = 'python', index_col= 0)
un_report.index.name = "Country"
un_report.head()
un_report["GDP per capita (current US$)"].head()
low_income = un_report[un_report["GDP per capita (current US$)"] < 1035]
li_gdpc = low_income[low_income["GDP per capita (current US$)"] >0]
li_gdpc = li_gdpc["GDP per capita (current US$)"]
li_gdpc = li_gdpc.sort_values()
matplotlib.rcParams.update({'font.size': 30})
fig = plt.figure()
fig.set_size_inches(50,20)
li_gdpc.plot("bar")
li_gdpc.index
h_score.index
h_score_lowincome = h_score[li_gdpc.index]
h_score_lowincome = h_score_lowincome.dropna()
h_score_lowincome = h_score_lowincome.sort_values()
h_score_lowincome.head()
h_score_lowincome
matplotlib.rcParams.update({'font.size': 30})
fig = plt.figure()
fig.set_size_inches(50,20)
h_score_lowincome.plot("bar")
h_score_lowincome.describe()
tattacks_count = gbterr2017['country_txt'].value_counts()
tattacks_count.index.name = "Country"
tattacks_count.name = "Terrorist Attacks Count"
tattacks_lowincome = tattacks_count[h_score_lowincome.index]
tattacks_lowincome = tattacks_lowincome.dropna()
tattacks_lowincome = tattacks_lowincome.sort_values()
tattacks_lowincome.head()
matplotlib.rcParams.update({'font.size': 30})
fig = plt.figure()
fig.set_size_inches(50,20)
tattacks_lowincome.plot("bar")
tattacks_lowincome.describe()
tattacks_lowincome.corr(h_score_lowincome)
lowincomea_h = happiness.copy()
lowincomea_h["Attacks"] = tattacks_lowincome
lowincomea_h.corr()
f,ax = plt.subplots(figsize=(25, 18))
sns.heatmap(lowincomea_h.corr(), annot=True, linewidths=.5, fmt= '.1f',ax=ax)
plt.show()
low_middle_income = un_report[un_report["GDP per capita (current US$)"] > 1035 ]
low_middle_income = low_middle_income[low_middle_income["GDP per capita (current US$)"] <4085]
lmi_gdpc = low_middle_income["GDP per capita (current US$)"]
lmi_gdpc = lmi_gdpc.sort_values()
matplotlib.rcParams.update({'font.size': 40})
fig = plt.figure()
fig.set_size_inches(50,20)
lmi_gdpc.plot("bar")
lmi_gdpc.describe()
matplotlib.rcParams.update({'font.size': 10})
lmi_hscore = h_score[lmi_gdpc.index]
lmi_hscore = lmi_hscore.sort_values()
lmi_hscore =lmi_hscore.dropna()
lmi_hscore.head()
lmi_hscore.name= "Happiness Score"
matplotlib.rcParams.update({'font.size': 30})
fig = plt.figure()
fig.set_size_inches(50,20)
lmi_hscore.plot("bar")
lmi_hscore.describe()
tattacks_lmi = tattacks_count[lmi_hscore.index]
tattacks_lmi = tattacks_lmi.dropna()
tattacks_lmi =tattacks_lmi.sort_values()
matplotlib.rcParams.update({'font.size': 30})
fig = plt.figure()
fig.set_size_inches(50,20)
tattacks_lmi.plot("bar")
tattacks_lmi.describe()
tattacks_lmi.head()
lmi_hscore.head()
lmi_hscore.corr(tattacks_lmi)
lowerMiddleincomea_h = happiness.copy()
lowerMiddleincomea_h["Attacks"] = tattacks_lmi
f,ax = plt.subplots(figsize=(25, 18))
sns.heatmap(lowerMiddleincomea_h.corr(), annot=True, linewidths=.5, fmt= '.1f',ax=ax)
plt.show()
upper_middle_income = un_report[un_report["GDP per capita (current US$)"] > 4086 ]
upper_middle_income = upper_middle_income[upper_middle_income["GDP per capita (current US$)"] <12615]
umi_gdpc = upper_middle_income["GDP per capita (current US$)"]
umi_gdpc = umi_gdpc.sort_values()
matplotlib.rcParams.update({'font.size': 40})
fig = plt.figure()
fig.set_size_inches(50,20)
umi_gdpc.plot("bar")
umi_gdpc.describe()
matplotlib.rcParams.update({'font.size': 20})
umi_hscore = h_score[umi_gdpc.index]
umi_hscore = umi_hscore.dropna()
umi_hscore = umi_hscore.sort_values()
fig = plt.figure()
fig.set_size_inches(30,20)
umi_hscore.plot("bar")
umi_hscore.describe()
tattacks_umi = tattacks_count[umi_hscore.index]
tattacks_umi = tattacks_umi.dropna()
tattacks_umi = tattacks_umi.sort_values()
matplotlib.rcParams.update({'font.size': 15})
fig = plt.figure()
fig.set_size_inches(20,20)
tattacks_umi.plot("bar")
tattacks_umi.describe()
tattacks_umi.head()
umi_hscore.head()
tattacks_umi.corr(umi_hscore)
upperMiddleincomea_h = happiness.copy()
upperMiddleincomea_h["Attacks"] = tattacks_umi
upperMiddleincomea_h = upperMiddleincomea_h.dropna()
upperMiddleincomea_h
f,ax = plt.subplots(figsize=(25, 18))
sns.heatmap(upperMiddleincomea_h.corr(), annot=True, linewidths=.5, fmt= '.1f',ax=ax)
plt.show()
#note that the only important line in the heat map above is the last line.
high_income = un_report[un_report["GDP per capita (current US$)"] > 12615]
hi_gdpc = high_income["GDP per capita (current US$)"]
hi_gdpc = hi_gdpc.sort_values()
matplotlib.rcParams.update({'font.size': 40})
fig = plt.figure()
fig.set_size_inches(50,50)
hi_gdpc.plot("bar")
hi_gdpc.describe()
highIncome_hscore = h_score[hi_gdpc.index]
highIncome_hscore["United States"] = h_score["United States"]
highIncome_hscore["Czech Republic"] = h_score["Czech Republic"]
highIncome_hscore["South Korea"] = h_score["South Korea"]
highIncome_hscore = highIncome_hscore.dropna()
highIncome_hscore = highIncome_hscore.sort_values()
matplotlib.rcParams.update({'font.size': 30})
fig = plt.figure()
fig.set_size_inches(50,50)
highIncome_hscore.plot("bar")
highIncome_hscore.describe()
tattacks_hi = tattacks_count[highIncome_hscore.index]
tattacks_hi = tattacks_hi.dropna()
tattacks_hi = tattacks_hi.sort_values()
matplotlib.rcParams.update({'font.size': 20})
fig = plt.figure()
fig.set_size_inches(30,20)
tattacks_hi.plot("bar")
tattacks_hi.describe()
tattacks_hi.head()
highIncome_hscore.head()
tattacks_hi.corr(highIncome_hscore)
Highincomea_h = happiness.copy()
Highincomea_h["Attacks"] = tattacks_hi
Highincomea_h = Highincomea_h.dropna()
f,ax = plt.subplots(figsize=(25, 18))
sns.heatmap(Highincomea_h.corr(), annot=True, linewidths=.5, fmt= '.1f',ax=ax)
plt.show()
tattacks_hi.sum()
tattacks_umi.sum()
tattacks_lmi.sum()
tattacks_lowincome.sum()
classes = pd.Series(index=["Low Income","Lower Middle Income","Upper Middle Income", "High Income" ])
classes.index.name="Classification"
classes.name="Total Terrorist Attacks"
classes["Low Income"] = tattacks_lowincome.sum()
classes["Lower Middle Income"] = tattacks_lmi.sum()
classes["Upper Middle Income"] = tattacks_umi.sum() - tattacks_count["Iraq"] #leave or remove Iraq???
classes["High Income"] = tattacks_hi.sum()
classes
classes.plot("bar")
highIncome_hscore.mean()
highIncome_hscore.std()
from bokeh.io import output_file, output_notebook, show
from bokeh.io import output_file, show
from bokeh.models import ColumnDataSource, GMapOptions
from bokeh.plotting import gmap
df = pd.read_csv('globalTerrorism.csv', engine = 'python')
latitude_list = df['latitude'].tolist()
longitude_list = df['longitude'].tolist()
map_options = GMapOptions(lat=29, lng=-98, map_type="roadmap", zoom=7)
p = gmap("AIzaSyAx7GNIhrPKKX7LPlC84iGc3Mg-8_uKdbI", map_options, title="Terrorist Attacks")
source = ColumnDataSource(
data=dict(lat=df['latitude'].tolist(),
lon=df['longitude'].tolist())
)
p.circle(x="lon", y="lat", size=15, fill_color="blue", fill_alpha=0.8, source=source)
output_notebook()
show(p)